from numpy import *
import operator


__author__ = 'zjw'


def fileToMatrix(fileName):
    """
    将文件转为相应的矩阵
    :param fileName: 文件路径和文件名
    :return:
        eigenvalueMatrix:特征值矩阵
        typeMatrix:类型矩阵
    """
    file = open(fileName)
    lines = file.readlines()
    numOfLines = len(lines)
    eigenvalueMatrix = zeros((numOfLines, 3))
    typeMatrix = zeros(numOfLines)
    index = 0
    for line in lines:
        line = line.strip()
        line = line.split("\t")
        eigenvalueMatrix[index, 0:3] = line[0:3]
        typeMatrix[index] = line[-1]
        index += 1
    return eigenvalueMatrix, typeMatrix


def eigenvalueNormalization(eigenvalueMatrix):
    """
    特征值归一化，使所有特征值所占权重相等。
    单一特征值归一化公式：
        （特征值-min)/(max-min)
    :param eigenvalueMatrix:特征矩阵
    :return:
    """
    # 取出每列所对应的所有行中的最大值/最小值，形成一个以为矩阵赋值给maxValues/minValues
    maxValues = eigenvalueMatrix.max(0)
    minValues = eigenvalueMatrix.min(0)
    ranges = maxValues - minValues
    numOfRows = len(eigenvalueMatrix)
    # 特征值-min矩阵
    eigenvalueSubMinMatrix = zeros(shape(eigenvalueMatrix))
    # max-min矩阵
    rangesMatrix = zeros(shape(eigenvalueMatrix))
    for i in range(numOfRows):
        eigenvalueSubMinMatrix[i, :] = eigenvalueMatrix[i, :] - minValues[:]
        rangesMatrix[i, :] = ranges[:]
    normEigenvalueMatrix = eigenvalueSubMinMatrix / rangesMatrix
    return normEigenvalueMatrix, ranges, minValues


def classify(toBeJudged, normEigenvalueMatrix, typeMatrix, k):
    """
    分类器，通过计算获得带判断特征的类型
    :param toBeJudged:待判断的特征类型
    :param normEigenvalueMatrix:归一化特征值矩阵
    :param typeMatrix:类型矩阵
    :param k:k个最近类型
    :return:
    """
    numOfRows = len(normEigenvalueMatrix)
    judgeMatrix = zeros(shape(normEigenvalueMatrix))
    for i in range(numOfRows):
        judgeMatrix[i, :] = toBeJudged[:]
    squareMatrix = (normEigenvalueMatrix - judgeMatrix) ** 2
    sumMatrix = squareMatrix.sum(axis=1)
    distancesMatrix = sumMatrix ** 0.5
    sortedDistancesMatrix = distancesMatrix.argsort()
    eachOfCount = {}
    for i in range(k):
        index = sortedDistancesMatrix[i]
        eachOfCount[typeMatrix[index]] = eachOfCount.get(typeMatrix[index], 0) + 1
    sortedEachOfCount = sorted(eachOfCount.items(), key=operator.itemgetter(1), reverse=True)
    return sortedEachOfCount[0][0]


if __name__ == '__main__':
    eigenvalueMatrix, typeMatrix = fileToMatrix('datingTestSet2.txt')
    normEigenvalueMatrix, ranges, minValues = eigenvalueNormalization(eigenvalueMatrix)
    dis = float(input("每年获得的飞行常客历程数："))
    per = float(input("玩视频游戏所耗时间百分比："))
    ice = float(input("每年消费冰淇淋公升数："))
    toBeJudged = array([dis, per, ice])
    normJudged = (toBeJudged - minValues) / ranges
    type = classify(normJudged, normEigenvalueMatrix, typeMatrix, 3)
    typeOfPeople = ['不喜欢的人', '魅力一般的人', '极具魅力的人']
    print(typeOfPeople[int(type) - 1])
